{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOVvDCWPYCukTRnZPIMfuly"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"id":"gHHpgh2fCenn","executionInfo":{"status":"ok","timestamp":1758817036566,"user_tz":-330,"elapsed":10,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}}},"outputs":[],"source":["import math, collections"]},{"cell_type":"code","source":["# Documents\n","docs = [\"A man and a woman.\", \"A baby.\"]"],"metadata":{"id":"PxwxZNEQqj-B","executionInfo":{"status":"ok","timestamp":1758817049168,"user_tz":-330,"elapsed":3,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["# Build vocabulary\n","vocab = sorted(set(w.lower().strip('.,') for d in docs for w in d.split()))"],"metadata":{"id":"6HCAZTbEqnMG","executionInfo":{"status":"ok","timestamp":1758817103136,"user_tz":-330,"elapsed":4,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["# Function to convert text into vector\n","def vectorize(text):\n"," c = collections.Counter(w.lower().strip('.,') for w in text.split())\n"," return [c[t] for t in vocab]"],"metadata":{"id":"FtCacihzqpsi","executionInfo":{"status":"ok","timestamp":1758817134464,"user_tz":-330,"elapsed":58,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["# Function to calculate cosine similarity\n","def cosine_sim(a, b):\n"," dot = sum(x * y for x, y in zip(a, b))\n"," mag_a = math.sqrt(sum(x * x for x in a))\n"," mag_b = math.sqrt(sum(y * y for y in b))\n"," return dot / (mag_a * mag_b) if mag_a and mag_b else 0"],"metadata":{"id":"QZP4xkoLqr22","executionInfo":{"status":"ok","timestamp":1758817136575,"user_tz":-330,"elapsed":20,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["# Query\n","query = \"woman\"\n","q_vec = vectorize(query)"],"metadata":{"id":"RLvRIAbSquHd","executionInfo":{"status":"ok","timestamp":1758817137885,"user_tz":-330,"elapsed":5,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}}},"execution_count":6,"outputs":[]},{"cell_type":"code","source":["# Compare query with each document\n","doc_num = 1\n","for doc in docs:\n"," sim = cosine_sim(vectorize(doc), q_vec)\n"," print(\"Doc\", doc_num, \"similarity:\", round(sim, 3))\n"," doc_num += 1"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EnKMCZ1zqv4s","executionInfo":{"status":"ok","timestamp":1758817139151,"user_tz":-330,"elapsed":23,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}},"outputId":"d6cd8918-1952-4697-e566-43e67e0c4129"},"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["Doc 1 similarity: 0.378\n","Doc 2 similarity: 0.0\n"]}]}]}